********************************************************************************************************************************
* This replication file estimates the potential consumer surplus gains/losses from switching notaries across current locations *
********************************************************************************************************************************
*1. Load the parameters and estimate the status quo.
*2. Remove a notary from each location and move it to randomly selected alternative locations until found one location which results in a CS-improvement and one location which results in a CS-decrease.
*3. Store the information.  

*******************************************************
* 1. Load the parameters and estimate the status quo. *
*******************************************************
clear all
capture log close
set more off
set trace off 
mata: mata set matastrict off
use "Data_Belgium\Generated_data\baseline_demand_dataset.dta", clear
qui do "Command_files\spatial_demand_Q.do"

//Set predetermined values:
global starting_value `" 1 "' //If this is set to 1, then we begin with the status quo starting values.
set seed 1234
sca scale=10
mata: mata matuse "Estimates\betas_Q", replace
mata: mata matuse "Estimates\betas_Q1", replace
mata: mata matuse "Estimates\betas_Q2", replace
mata: mata matuse "Estimates\cost_price_parameters", replace
mata:    st_numscalar("p_Q1",p_Q1)
mata:    st_numscalar("p_Q2",p_Q2)
mata:    st_numscalar("alpha",alpha)
mata:    st_numscalar("nu",nu)
mata:    st_numscalar("aL",aL)
mata:    st_numscalar("phi",phi)
mata:    st_numscalar("aM",aM)
mata:    st_numscalar("fixed_cost",fixed_cost)

mata: lambda=1
mata: profit_constraint=1

if $select_arrond == 3 {
global select_arrond_secondary `" 7 "' //This selects a particular judicial district.
}
else{
global select_arrond_secondary `" 0 "' //This selects a particular judicial district.
}

// Estimate and store information on the status quo for comparison. 
//Calculate the total demand for real estate on the market
qui su Q1 if common==1
sca sumQ1=r(sum)
sca di sumQ1
//Calculate the total demand for other transactions on the market
qui su Q2 if common==1
sca sumQ2=r(sum)
sca di sumQ2
//Calculate the total population on the market
bysort market_ID:  gen dup_market = cond(_N==1,0,_n)
qui su population if dup_market<2
sca sumPopulation=r(sum)
sca di sumPopulation
//Calculate total potential demand per capita as 10 times larger than the current value.
scalar gamma_Q1=scale*sumQ1/sumPopulation
sca di gamma_Q1
scalar gamma_Q2=scale*sumQ2/sumPopulation
sca di gamma_Q2
//Calculate the market size (i.e. number of possible transactions)
gen market_size_Q1=gamma_Q1*population
gen market_size_Q2=gamma_Q2*population

//Define the product for which demand will be estimated.
gen firm_demand_Q1=Q1 
gen firm_demand_Q2=Q2

//Generate a unique numeric identifier for the market.
sort market_ID
egen market_ID_num = group(market_ID)
//Keep only observations in the selected judicial district
keep if arrondjud==$select_arrond | arrondjud==$select_arrond_secondary

// Controls
global controls_utility distance lnNotPerOff start_not LDE Perc_Young Perc_Elderly log_income popdens
global controls_total_Q1 newid firm_demand_Q1 market_ID_num market_size_Q1 $controls_utility
global controls_total_Q2 newid firm_demand_Q2 market_ID_num market_size_Q2 $controls_utility

//Define newid, which is just firm_IDs which go from 1 to 1152.
egen newid = group(firm_ID)
// Generate a unique observation identifier, which is used to merge the results from the counterfactuals with the initial dataset
tostring market_ID_num, gen(market_ID_string)
tostring newid, gen(newid_string)
gen join="_"
egen unique_identifier=concat(newid_string join market_ID_string)
drop join market_ID_string newid_string

// Count the number of firms in the sample
bysort firm_ID:  gen dup_firm = cond(_N==1,0,_n)
count if dup_firm<2
sca n_firm=r(N)
// Count the number of markets in the subsample
bysort market_ID: gen nvals = _n == 1 
count if nvals
sca markets_number=r(N)
di markets_number
drop nvals
* Count the number of markets with a local notary
preserve
keep if common==1 //This denotes the local market of a notary
bysort market_ID: gen nvals = _n == 1 
count if nvals
sca covered_markets_current=r(N)
sca uncovered_markets_current=markets_number-covered_markets_current
su NotPerOff
sca total_notaries_current=r(sum)
restore

//Status quo estimation and descriptives
* Sort the data by market ID, since this will be the first relevant grouping (to calculate notary utility within a municipality)
sort market_ID_num newid
mata:
// Select the relevant variables and read them into mata
X_Q1    = st_data(., "$controls_total_Q1")
X_Q2    = st_data(., "$controls_total_Q2")
// nx counts the number of observations
nx    = rows(X_Q1)
// Add an intercept, with length equal to the number of observations 
X_Q1    = X_Q1,J(nx,1,1)
X_Q2    = X_Q2,J(nx,1,1)

// Calculate output, welfare, producer and consumer surplus under the status quo.
n_firm=st_numscalar("n_firm")
total_notaries_current=st_numscalar("total_notaries_current")
Results_ln_current_Q1=spdemand_log(X_Q1,betas_Q1)
X_Q1=sort(X_Q1,(3,1)) //sort by market and then firm_id
Results_ln_current_Q2=spdemand_log(X_Q2,betas_Q2)
X_Q2=sort(X_Q2,(3,1)) //sort by market and then firm_id
Results_current_Q1=exp(Results_ln_current_Q1[,2])
Results_current_Q2=exp(Results_ln_current_Q2[,2])
Results_current=Results_current_Q1+Results_current_Q2
output_current=round(sum(Results_current))
consumer_surplus_market_current=consumer_surplus(X_Q1,betas_Q1,transportation_cost)+consumer_surplus(X_Q2,betas_Q2,transportation_cost)
per_firm_variable_profit_current=per_firm_profit_mult_v2(X_Q1,X_Q2,betas_Q1,betas_Q2,p_Q1,p_Q2,nu,aL,phi,aM,alpha)
consumer_surplus_current=round(sum(consumer_surplus_market_current[,2]))
producer_surplus_current=round(sum(per_firm_variable_profit_current[,2])-fixed_cost*total_notaries_current)
welfare_current=round(consumer_surplus_current+producer_surplus_current)
   st_numscalar("output_current", output_current)
   st_numscalar("consumer_surplus_current", consumer_surplus_current)
   st_numscalar("producer_surplus_current", producer_surplus_current)
   st_numscalar("welfare_current", welfare_current)
//Generate a vector with the number of notaries in each office for every office-market combination
n_notaries=J(nx,1,0)
n_notaries=round(exp(X_Q1[.,6])) //The sixth column of X stores the log of the number of notaries in a given office
//Generate a vector with the number of notaries for every office
notary_vector=J(n_firm,1,0) //Make an empty notary vector
//Calculate the actual notaries per office under the status quo
for(f=1; f<=n_firm; f++) {
notary_vector[f,1]=sum(n_notaries:*(X_Q1[.,1]:==f))/(sum((X_Q1[.,1]:==f))) //This is essentially calculating the average number of notaries across all observations for the same notary office, which is just the number of notaries for that office.
}
	end
di consumer_surplus_current
di producer_surplus_current
di welfare_current

*********************************************************************************************************************************************************************************************************************
* 2. Remove a notary from each location and move it to randomly selected alternative locations until you have found one location which results in a CS-improvement and one location which results in a CS-decrease. *
*********************************************************************************************************************************************************************************************************************
*Notes for understanding:
* The notary vector is ordered by newid, which just goes over all locations and ranges from 1 to the number of locations under consideration.
//Make empty vectors where we will store the information on consumer surplus changes and producer surplus changes
mata: 
sum(notary_vector:!=0) //This is the number of non-zero offices in the dataset
max_rounds=n_firm //This is the maximum number of random draws that we allow before we give up looking for an improvement. We check all the locations in which there is an office. Once these have been checked, there is no reason to spend more time looking for an alternative location.
consumer_surplus_pos=J(n_firm,1,0) //In this matrix we will store the positive consumer surplus change from a switch.
consumer_surplus_neg=J(n_firm,1,0) //In this matrix we will store the negative consumer surplus change from a switch.
producer_surplus_pos=J(n_firm,1,0) //In this matrix we will store the change in producer surplus after a positive consumer surplus change.
producer_surplus_neg=J(n_firm,1,0) //In this matrix we will store the change in producer surplus after a negative consumer surplus change.
end

mata:
for (f=1; f<=n_firm; f++){ //Go over all locations (f is the index of each firm location)
if (notary_vector[f,1]>0) { //Only do removals at the existing locations
//Remove a notary from office f
notary_vector[f,1]=	notary_vector[f,1]-1 //Remove the notary to the notary vector where the f-th element shows how many notaries are active in location f.
X_Q1[.,6]=mm_cond((X_Q1[.,1]:==f),ln(notary_vector[f,1]),X_Q1[.,6]) //Change the control variable matrix to reflect this change.
X_Q2[.,6]=mm_cond((X_Q2[.,1]:==f),ln(notary_vector[f,1]),X_Q2[.,6]) //Change the control variable matrix to reflect this change.
round=0 
//Take random draws without replacement to pick switching locations
id_of_switch=J(n_firm,1,.) //id_of_switch contains the id of the location to which we will consider switching
for(k=1;k<=n_firm;k++) id_of_switch[k,1]=k //We first start with just the numbers from 1 to the total number of offices (in other words, all ids of locations)
id_of_switch=id_of_switch,runiform(n_firm,1) //We concatinate to that vector of ordered ids a random uniform draw
id_of_switch=sort(id_of_switch,2) //we sort the ids based on the value of the random uniform draw, thus randomly ordering all candidate switching locations
 while ((consumer_surplus_pos[f,1]==0 & round <max_rounds) | (consumer_surplus_neg[f,1]==0 & round <max_rounds)) { //This checks if we have found a notary office which is better and a notary office which is worse.
round=round+1
//Add a notary to a random office i
i=id_of_switch[round,1] //Random draw of an office i
notary_vector[i,1]=	notary_vector[i,1]+1 //Add a notary to the notary vector in candidate switching location i.
X_Q1[.,6]=mm_cond((X_Q1[.,1]:==i),ln(notary_vector[i,1]),X_Q1[.,6]) //Change the control variable matrix to reflect this change.
X_Q2[.,6]=mm_cond((X_Q2[.,1]:==i),ln(notary_vector[i,1]),X_Q2[.,6]) //Change the control variable matrix to reflect this change.

//Select the notaries will non-zero observations
X_selection1_Q1=select(X_Q1, X_Q1[.,6]:>=0) //Select those values of X where the log of notaries is >=0, i.e. only use data on active notary offices.
X_selection_Q1=select(X_selection1_Q1, X_selection1_Q1[.,6]:!=.) //Select those values of X where the log of notaries is not missing, i.e. only use data on active notary offices.
X_selection1_Q2=select(X_Q2, X_Q2[.,6]:>=0) //Select those values of X where the log of notaries is >=0, i.e. only use data on active notary offices.
X_selection_Q2=select(X_selection1_Q2, X_selection1_Q2[.,6]:!=.) //Select those values of X where the log of notaries is not missing, i.e. only use data on active notary offices.

//Calculate the consumer surplus with the new constellation of firms
consumer_surplus_market_counter=consumer_surplus(X_selection_Q1,betas_Q1,transportation_cost)+consumer_surplus(X_selection_Q2,betas_Q2,transportation_cost)
consumer_surplus_counter=sum(consumer_surplus_market_counter[,2])
change_CS=consumer_surplus_counter-consumer_surplus_current
//If the change in CS is positive, store this information and calculate the firm profits
if (consumer_surplus_pos[f,1]==0 & change_CS>0) {
consumer_surplus_pos[f,1]=change_CS
//Change in firm profits
per_firm_variable_profit_counter=per_firm_profit_mult_v2(X_selection_Q1,X_selection_Q2,betas_Q1,betas_Q2,p_Q1,p_Q2,nu,aL,phi,aM,alpha)
producer_surplus_counter=sum(per_firm_variable_profit_counter[,2])-fixed_cost*total_notaries_current //Note that the total number of notaries does not change!
producer_surplus_pos[f,1]=producer_surplus_counter-producer_surplus_current
} //End of loop where positive changes in consumer surplus are stored
//If the change in CS is negative, store this information and calculate the firm profits
if (consumer_surplus_neg[f,1]==0 & change_CS<0) {
//Change in consumer surplus
consumer_surplus_neg[f,1]=change_CS
//Change in firm profits
per_firm_variable_profit_counter=per_firm_profit_mult_v2(X_selection_Q1,X_selection_Q2,betas_Q1,betas_Q2,p_Q1,p_Q2,nu,aL,phi,aM,alpha)
producer_surplus_counter=sum(per_firm_variable_profit_counter[,2])-fixed_cost*total_notaries_current
producer_surplus_neg[f,1]=producer_surplus_counter-producer_surplus_current
} //End of loop where negative changes to consumer surplus are stored
//Remove the counterfactual notary from the data
notary_vector[i,1]=	notary_vector[i,1]-1 //Remove a notary from the notary vector in candidate switching location i, in order to add it in a new location in the next round.
X_Q1[.,6]=mm_cond((X_Q1[.,1]:==i),ln(notary_vector[i,1]),X_Q1[.,6]) //Change the control variable matrix to reflect this change.
X_Q2[.,6]=mm_cond((X_Q2[.,1]:==i),ln(notary_vector[i,1]),X_Q2[.,6]) //Change the control variable matrix to reflect this change.
//If both the surplus positive and surplus negative are not filled, keep going
} //End of loop looking for an improvement and deterioration in consumer surplus
//At the end of the loop, add the original notary back into its location (f)
notary_vector[f,1]=	notary_vector[f,1]+1 
X_Q1[.,6]=mm_cond((X_Q1[.,1]:==f),ln(notary_vector[f,1]),X_Q1[.,6]) //Change the control variable matrix to reflect this change.
X_Q2[.,6]=mm_cond((X_Q2[.,1]:==f),ln(notary_vector[f,1]),X_Q2[.,6]) //Change the control variable matrix to reflect this change.
} //End of loop checking that there is already a notary in the location to begin with
} //End of loop going over each active notary and trying a switch
end
mata:
newid=X_Q1[.,1]
	end
	keep if common==1

sort newid
getmata consumer_surplus_pos
getmata producer_surplus_pos
getmata consumer_surplus_neg
getmata producer_surplus_neg
drop if NotPerOff==0

//Export the data on consumer surplus increase through switches in location
replace consumer_surplus_pos=. if consumer_surplus_pos==0
//This is the cost to producers that the switch would have generated
replace producer_surplus_pos=. if consumer_surplus_pos==.
//Export the data on consumer surplus decrease through switches in location
replace consumer_surplus_neg=. if consumer_surplus_neg==0
//This is the cost to producers that the switch would have generated
replace producer_surplus_neg=. if consumer_surplus_neg==.

******************************************
*3. Store the information for later use. *  
******************************************

keep bvdid_num consumer_surplus_pos producer_surplus_pos consumer_surplus_neg producer_surplus_neg
save "Estimates\Lambda_bounds\lambda_`=$select_arrond'.dta", replace
